<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <meta content="Docutils 0.17.1: http://docutils.sourceforge.net/" name="generator"/>
  <meta content="width=device-width,initial-scale=1" name="viewport"/>
  <meta content="ie=edge" http-equiv="x-ua-compatible"/>
  <meta content="Copy to clipboard" name="lang:clipboard.copy"/>
  <meta content="Copied to clipboard" name="lang:clipboard.copied"/>
  <meta content="en" name="lang:search.language"/>
  <meta content="True" name="lang:search.pipeline.stopwords"/>
  <meta content="True" name="lang:search.pipeline.trimmer"/>
  <meta content="No matching documents" name="lang:search.result.none"/>
  <meta content="1 matching document" name="lang:search.result.one"/>
  <meta content="# matching documents" name="lang:search.result.other"/>
  <meta content="[\s\-]+" name="lang:search.tokenizer"/>
  <link crossorigin="" href="https://fonts.gstatic.com/" rel="preconnect"/>
  <link href="https://fonts.googleapis.com/css?family=Roboto+Mono:400,500,700|Roboto:300,400,400i,700&amp;display=fallback" rel="stylesheet"/>
  <style>
   body,
      input {
        font-family: "Roboto", "Helvetica Neue", Helvetica, Arial, sans-serif
      }

      code,
      kbd,
      pre {
        font-family: "Roboto Mono", "Courier New", Courier, monospace
      }
  </style>
  <link href="../_static/stylesheets/application.css" rel="stylesheet"/>
  <link href="../_static/stylesheets/application-palette.css" rel="stylesheet"/>
  <link href="../_static/stylesheets/application-fixes.css" rel="stylesheet"/>
  <link href="../_static/fonts/material-icons.css" rel="stylesheet"/>
  <meta content="84bd00" name="theme-color"/>
  <script src="../_static/javascripts/modernizr.js">
  </script>
  <title>
   Getting Started with C++ — Torch-TensorRT v1.1.0 documentation
  </title>
  <link href="../_static/pygments.css" rel="stylesheet" type="text/css"/>
  <link href="../_static/material.css" rel="stylesheet" type="text/css"/>
  <link href="../_static/collapsible-lists/css/tree_view.css" rel="stylesheet" type="text/css"/>
  <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js">
  </script>
  <script src="../_static/jquery.js">
  </script>
  <script src="../_static/underscore.js">
  </script>
  <script src="../_static/doctools.js">
  </script>
  <script src="../_static/collapsible-lists/js/CollapsibleLists.compressed.js">
  </script>
  <script src="../_static/collapsible-lists/js/apply-collapsible-lists.js">
  </script>
  <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js">
  </script>
  <link href="../genindex.html" rel="index" title="Index"/>
  <link href="../search.html" rel="search" title="Search"/>
  <link href="getting_started_with_python_api.html" rel="next" title="Using Torch-TensorRT in Python"/>
  <link href="installation.html" rel="prev" title="Installation"/>
 </head>
 <body data-md-color-accent="light-green" data-md-color-primary="light-green" dir="ltr">
  <svg class="md-svg">
   <defs data-children-count="0">
    <svg height="448" id="__github" viewbox="0 0 416 448" width="416" xmlns="http://www.w3.org/2000/svg">
     <path d="M160 304q0 10-3.125 20.5t-10.75 19T128 352t-18.125-8.5-10.75-19T96 304t3.125-20.5 10.75-19T128 256t18.125 8.5 10.75 19T160 304zm160 0q0 10-3.125 20.5t-10.75 19T288 352t-18.125-8.5-10.75-19T256 304t3.125-20.5 10.75-19T288 256t18.125 8.5 10.75 19T320 304zm40 0q0-30-17.25-51T296 232q-10.25 0-48.75 5.25Q229.5 240 208 240t-39.25-2.75Q130.75 232 120 232q-29.5 0-46.75 21T56 304q0 22 8 38.375t20.25 25.75 30.5 15 35 7.375 37.25 1.75h42q20.5 0 37.25-1.75t35-7.375 30.5-15 20.25-25.75T360 304zm56-44q0 51.75-15.25 82.75-9.5 19.25-26.375 33.25t-35.25 21.5-42.5 11.875-42.875 5.5T212 416q-19.5 0-35.5-.75t-36.875-3.125-38.125-7.5-34.25-12.875T37 371.5t-21.5-28.75Q0 312 0 260q0-59.25 34-99-6.75-20.5-6.75-42.5 0-29 12.75-54.5 27 0 47.5 9.875t47.25 30.875Q171.5 96 212 96q37 0 70 8 26.25-20.5 46.75-30.25T376 64q12.75 25.5 12.75 54.5 0 21.75-6.75 42 34 40 34 99.5z" fill="currentColor">
     </path>
    </svg>
   </defs>
  </svg>
  <input class="md-toggle" data-md-toggle="drawer" id="__drawer" type="checkbox"/>
  <input class="md-toggle" data-md-toggle="search" id="__search" type="checkbox"/>
  <label class="md-overlay" data-md-component="overlay" for="__drawer">
  </label>
  <a class="md-skip" href="#tutorials/getting_started_with_cpp_api" tabindex="1">
   Skip to content
  </a>
  <header class="md-header" data-md-component="header">
   <nav class="md-header-nav md-grid">
    <div class="md-flex navheader">
     <div class="md-flex__cell md-flex__cell--shrink">
      <a class="md-header-nav__button md-logo" href="../index.html" title="Torch-TensorRT v1.1.0 documentation">
       <i class="md-icon">
        
       </i>
      </a>
     </div>
     <div class="md-flex__cell md-flex__cell--shrink">
      <label class="md-icon md-icon--menu md-header-nav__button" for="__drawer">
      </label>
     </div>
     <div class="md-flex__cell md-flex__cell--stretch">
      <div class="md-flex__ellipsis md-header-nav__title" data-md-component="title">
       <span class="md-header-nav__topic">
        Torch-TensorRT
       </span>
       <span class="md-header-nav__topic">
        Getting Started with C++
       </span>
      </div>
     </div>
     <div class="md-flex__cell md-flex__cell--shrink">
      <label class="md-icon md-icon--search md-header-nav__button" for="__search">
      </label>
      <div class="md-search" data-md-component="search" role="dialog">
       <label class="md-search__overlay" for="__search">
       </label>
       <div class="md-search__inner" role="search">
        <form action="../search.html" class="md-search__form" method="get" name="search">
         <input autocapitalize="off" autocomplete="off" class="md-search__input" data-md-component="query" data-md-state="active" name="q" placeholder="Search" spellcheck="false" type="text"/>
         <label class="md-icon md-search__icon" for="__search">
         </label>
         <button class="md-icon md-search__icon" data-md-component="reset" tabindex="-1" type="reset">
          
         </button>
        </form>
        <div class="md-search__output">
         <div class="md-search__scrollwrap" data-md-scrollfix="">
          <div class="md-search-result" data-md-component="result">
           <div class="md-search-result__meta">
            Type to start searching
           </div>
           <ol class="md-search-result__list">
           </ol>
          </div>
         </div>
        </div>
       </div>
      </div>
     </div>
     <div class="md-flex__cell md-flex__cell--shrink">
      <div class="md-header-nav__source">
       <a class="md-source" data-md-source="github" href="https://github.com/nvidia/Torch-TensorRT/" title="Go to repository">
        <div class="md-source__icon">
         <svg height="28" viewbox="0 0 24 24" width="28" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
          <use height="24" width="24" xlink:href="#__github">
          </use>
         </svg>
        </div>
        <div class="md-source__repository">
         Torch-TensorRT
        </div>
       </a>
      </div>
     </div>
     <div class="md-flex__cell md-flex__cell--shrink dropdown">
      <button class="dropdownbutton">
       Versions
      </button>
      <div class="dropdown-content md-hero">
       <a href="https://nvidia.github.io/Torch-TensorRT/" title="master">
        master
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v1.1.0/" title="v1.1.0">
        v1.1.0
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v1.0.0/" title="v1.0.0">
        v1.0.0
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.4.1/" title="v0.4.1">
        v0.4.1
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.4.0/" title="v0.4.0">
        v0.4.0
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.3.0/" title="v0.3.0">
        v0.3.0
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.2.0/" title="v0.2.0">
        v0.2.0
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.1.0/" title="v0.1.0">
        v0.1.0
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.0.3/" title="v0.0.3">
        v0.0.3
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.0.2/" title="v0.0.2">
        v0.0.2
       </a>
       <a href="https://nvidia.github.io/Torch-TensorRT/v0.0.1/" title="v0.0.1">
        v0.0.1
       </a>
      </div>
     </div>
    </div>
   </nav>
  </header>
  <div class="md-container">
   <nav class="md-tabs" data-md-component="tabs">
    <div class="md-tabs__inner md-grid">
     <ul class="md-tabs__list">
      <li class="md-tabs__item">
       <a class="md-tabs__link" href="../index.html">
        Torch-TensorRT v1.1.0 documentation
       </a>
      </li>
     </ul>
    </div>
   </nav>
   <main class="md-main">
    <div class="md-main__inner md-grid" data-md-component="container">
     <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
      <div class="md-sidebar__scrollwrap">
       <div class="md-sidebar__inner">
        <nav class="md-nav md-nav--primary" data-md-level="0">
         <label class="md-nav__title md-nav__title--site" for="__drawer">
          <a class="md-nav__button md-logo" href="../index.html" title="Torch-TensorRT v1.1.0 documentation">
           <i class="md-icon">
            
           </i>
          </a>
          <a href="../index.html" title="Torch-TensorRT v1.1.0 documentation">
           Torch-TensorRT
          </a>
         </label>
         <div class="md-nav__source">
          <a class="md-source" data-md-source="github" href="https://github.com/nvidia/Torch-TensorRT/" title="Go to repository">
           <div class="md-source__icon">
            <svg height="28" viewbox="0 0 24 24" width="28" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
             <use height="24" width="24" xlink:href="#__github">
             </use>
            </svg>
           </div>
           <div class="md-source__repository">
            Torch-TensorRT
           </div>
          </a>
         </div>
         <ul class="md-nav__list">
          <li class="md-nav__item">
           <span class="md-nav__link caption">
            <span class="caption-text">
             Getting Started
            </span>
           </span>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="installation.html">
            Installation
           </a>
          </li>
          <li class="md-nav__item">
           <input class="md-toggle md-nav__toggle" data-md-toggle="toc" id="__toc" type="checkbox"/>
           <label class="md-nav__link md-nav__link--active" for="__toc">
            Getting Started with C++
           </label>
           <a class="md-nav__link md-nav__link--active" href="#">
            Getting Started with C++
           </a>
           <nav class="md-nav md-nav--secondary">
            <label class="md-nav__title" for="__toc">
             Contents
            </label>
            <ul class="md-nav__list" data-md-scrollfix="">
             <li class="md-nav__item">
              <a class="md-nav__link" href="#tutorials-getting-started-with-cpp-api--page-root">
               Getting Started with C++
              </a>
              <nav class="md-nav">
               <ul class="md-nav__list">
                <li class="md-nav__item">
                 <a class="md-nav__link" href="#using-torch-tensorrt-in-c">
                  Using Torch-TensorRT in C++
                 </a>
                 <nav class="md-nav">
                  <ul class="md-nav__list">
                   <li class="md-nav__item">
                    <a class="md-nav__link" href="#torch-tensorrt-quickstart-compiling-torchscript-modules-with-torchtrtc">
                     [Torch-TensorRT Quickstart] Compiling TorchScript Modules with
                     <code class="docutils literal notranslate">
                      <span class="pre">
                       torchtrtc
                      </span>
                     </code>
                    </a>
                   </li>
                   <li class="md-nav__item">
                    <a class="md-nav__link" href="#working-with-torchscript-in-c">
                     Working with TorchScript in C++
                    </a>
                   </li>
                   <li class="md-nav__item">
                    <a class="md-nav__link" href="#compiling-with-torch-tensorrt-in-c">
                     Compiling with Torch-TensorRT in C++
                    </a>
                   </li>
                   <li class="md-nav__item">
                    <a class="md-nav__link" href="#under-the-hood">
                     Under The Hood
                    </a>
                   </li>
                   <li class="md-nav__item">
                    <a class="md-nav__link" href="#working-with-unsupported-operators">
                     Working with Unsupported Operators
                    </a>
                    <nav class="md-nav">
                     <ul class="md-nav__list">
                      <li class="md-nav__item">
                       <a class="md-nav__link" href="#registering-custom-converters">
                        Registering Custom Converters
                       </a>
                      </li>
                     </ul>
                    </nav>
                   </li>
                  </ul>
                 </nav>
                </li>
               </ul>
              </nav>
             </li>
             <li class="md-nav__item">
              <a class="md-nav__extra_link" href="../_sources/tutorials/getting_started_with_cpp_api.rst.txt">
               Show Source
              </a>
             </li>
            </ul>
           </nav>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="getting_started_with_python_api.html">
            Using Torch-TensorRT in Python
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="creating_torchscript_module_in_python.html">
            Creating a TorchScript Module
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="creating_torchscript_module_in_python.html#working-with-torchscript-in-python">
            Working with TorchScript in Python
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="creating_torchscript_module_in_python.html#saving-torchscript-module-to-disk">
            Saving TorchScript Module to Disk
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="ptq.html">
            Post Training Quantization (PTQ)
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="torchtrtc.html">
            torchtrtc
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="use_from_pytorch.html">
            Using Torch-TensorRT Directly From PyTorch
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="runtime.html">
            Deploying Torch-TensorRT Programs
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="using_dla.html">
            DLA
           </a>
          </li>
          <li class="md-nav__item">
           <span class="md-nav__link caption">
            <span class="caption-text">
             Notebooks
            </span>
           </span>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/CitriNet-example.html">
            Torch-TensorRT Getting Started - CitriNet
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/dynamic-shapes.html">
            Torch-TensorRT - Using Dynamic Shapes
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/EfficientNet-example.html">
            Torch-TensorRT Getting Started - EfficientNet-B0
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/Hugging-Face-BERT.html">
            Masked Language Modeling (MLM) with Hugging Face BERT Transformer
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/lenet-getting-started.html">
            Torch-TensorRT Getting Started - LeNet
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/Resnet50-example.html">
            Torch-TensorRT Getting Started - ResNet 50
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/ssd-object-detection-demo.html">
            Object Detection with Torch-TensorRT (SSD)
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_notebooks/vgg-qat.html">
            Deploying Quantization Aware Trained models in INT8 using Torch-TensorRT
           </a>
          </li>
          <li class="md-nav__item">
           <span class="md-nav__link caption">
            <span class="caption-text">
             Python API Documenation
            </span>
           </span>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../py_api/torch_tensorrt.html">
            torch_tensorrt
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../py_api/logging.html">
            torch_tensorrt.logging
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../py_api/ptq.html">
            torch_tensorrt.ptq
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../py_api/ts.html">
            torch_tensorrt.ts
           </a>
          </li>
          <li class="md-nav__item">
           <span class="md-nav__link caption">
            <span class="caption-text">
             C++ API Documenation
            </span>
           </span>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_cpp_api/torch_tensort_cpp.html">
            Torch-TensorRT C++ API
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_cpp_api/namespace_torch_tensorrt.html">
            Namespace torch_tensorrt
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_cpp_api/namespace_torch_tensorrt__logging.html">
            Namespace torch_tensorrt::logging
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_cpp_api/namespace_torch_tensorrt__torchscript.html">
            Namespace torch_tensorrt::torchscript
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../_cpp_api/namespace_torch_tensorrt__ptq.html">
            Namespace torch_tensorrt::ptq
           </a>
          </li>
          <li class="md-nav__item">
           <span class="md-nav__link caption">
            <span class="caption-text">
             Contributor Documentation
            </span>
           </span>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../contributors/system_overview.html">
            System Overview
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../contributors/writing_converters.html">
            Writing Converters
           </a>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../contributors/useful_links.html">
            Useful Links for Torch-TensorRT Development
           </a>
          </li>
          <li class="md-nav__item">
           <span class="md-nav__link caption">
            <span class="caption-text">
             Indices
            </span>
           </span>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__link" href="../indices/supported_ops.html">
            Operators Supported
           </a>
          </li>
         </ul>
        </nav>
       </div>
      </div>
     </div>
     <div class="md-sidebar md-sidebar--secondary" data-md-component="toc">
      <div class="md-sidebar__scrollwrap">
       <div class="md-sidebar__inner">
        <nav class="md-nav md-nav--secondary">
         <label class="md-nav__title" for="__toc">
          Contents
         </label>
         <ul class="md-nav__list" data-md-scrollfix="">
          <li class="md-nav__item">
           <a class="md-nav__link" href="#tutorials-getting-started-with-cpp-api--page-root">
            Getting Started with C++
           </a>
           <nav class="md-nav">
            <ul class="md-nav__list">
             <li class="md-nav__item">
              <a class="md-nav__link" href="#using-torch-tensorrt-in-c">
               Using Torch-TensorRT in C++
              </a>
              <nav class="md-nav">
               <ul class="md-nav__list">
                <li class="md-nav__item">
                 <a class="md-nav__link" href="#torch-tensorrt-quickstart-compiling-torchscript-modules-with-torchtrtc">
                  [Torch-TensorRT Quickstart] Compiling TorchScript Modules with
                  <code class="docutils literal notranslate">
                   <span class="pre">
                    torchtrtc
                   </span>
                  </code>
                 </a>
                </li>
                <li class="md-nav__item">
                 <a class="md-nav__link" href="#working-with-torchscript-in-c">
                  Working with TorchScript in C++
                 </a>
                </li>
                <li class="md-nav__item">
                 <a class="md-nav__link" href="#compiling-with-torch-tensorrt-in-c">
                  Compiling with Torch-TensorRT in C++
                 </a>
                </li>
                <li class="md-nav__item">
                 <a class="md-nav__link" href="#under-the-hood">
                  Under The Hood
                 </a>
                </li>
                <li class="md-nav__item">
                 <a class="md-nav__link" href="#working-with-unsupported-operators">
                  Working with Unsupported Operators
                 </a>
                 <nav class="md-nav">
                  <ul class="md-nav__list">
                   <li class="md-nav__item">
                    <a class="md-nav__link" href="#registering-custom-converters">
                     Registering Custom Converters
                    </a>
                   </li>
                  </ul>
                 </nav>
                </li>
               </ul>
              </nav>
             </li>
            </ul>
           </nav>
          </li>
          <li class="md-nav__item">
           <a class="md-nav__extra_link" href="../_sources/tutorials/getting_started_with_cpp_api.rst.txt">
            Show Source
           </a>
          </li>
          <li class="md-nav__item" id="searchbox">
          </li>
         </ul>
        </nav>
       </div>
      </div>
     </div>
     <div class="md-content">
      <article class="md-content__inner md-typeset" role="main">
       <section id="getting-started-with-c">
        <span id="getting-started">
        </span>
        <h1 id="tutorials-getting-started-with-cpp-api--page-root">
         Getting Started with C++
         <a class="headerlink" href="#tutorials-getting-started-with-cpp-api--page-root" title="Permalink to this headline">
          ¶
         </a>
        </h1>
        <p>
         If you haven’t already, acquire a tarball of the library by following the instructions in
         <a class="reference internal" href="installation.html#installation">
          <span class="std std-ref">
           Installation
          </span>
         </a>
        </p>
        <section id="using-torch-tensorrt-in-c">
         <h2 id="using-torch-tensorrt-in-c">
          Using Torch-TensorRT in C++
          <a class="headerlink" href="#using-torch-tensorrt-in-c" title="Permalink to this headline">
           ¶
          </a>
         </h2>
         <p>
          Torch-TensorRT C++ API accepts TorchScript modules (generated either from
          <code class="docutils literal notranslate">
           <span class="pre">
            torch.jit.script
           </span>
          </code>
          or
          <code class="docutils literal notranslate">
           <span class="pre">
            torch.jit.trace
           </span>
          </code>
          ) as an input and returns
a Torchscript module (optimized using TensorRT). This requires users to use Pytorch (in python) to generate torchscript modules beforehand.
Please refer to
          <a class="reference external" href="https://nvidia.github.io/Torch-TensorRT/tutorials/creating_torchscript_module_in_python.html">
           Creating TorchScript modules in Python
          </a>
          section to generate torchscript graphs.
         </p>
         <section id="torch-tensorrt-quickstart-compiling-torchscript-modules-with-torchtrtc">
          <span id="torch-tensorrt-quickstart">
          </span>
          <h3 id="torch-tensorrt-quickstart-compiling-torchscript-modules-with-torchtrtc">
           [Torch-TensorRT Quickstart] Compiling TorchScript Modules with
           <code class="docutils literal notranslate">
            <span class="pre">
             torchtrtc
            </span>
           </code>
           <a class="headerlink" href="#torch-tensorrt-quickstart-compiling-torchscript-modules-with-torchtrtc" title="Permalink to this headline">
            ¶
           </a>
          </h3>
          <p>
           An easy way to get started with Torch-TensorRT and to check if your model can be supported without extra work is to run it through
           <code class="docutils literal notranslate">
            <span class="pre">
             torchtrtc
            </span>
           </code>
           , which supports almost all features of the compiler from the command line including post training quantization
(given a previously created calibration cache). For example we can compile our lenet model by setting our preferred operating
precision and input size. This new TorchScript file can be loaded into Python (note: you need to
           <code class="docutils literal notranslate">
            <span class="pre">
             import
            </span>
            <span class="pre">
             torch_tensorrt
            </span>
           </code>
           before loading
these compiled modules because the compiler extends the PyTorch the deserializer and runtime to execute compiled modules).
          </p>
          <div class="highlight-shell notranslate">
           <div class="highlight">
            <pre><span></span>❯ torchtrtc -p f16 lenet_scripted.ts trt_lenet_scripted.ts <span class="s2">"(1,1,32,32)"</span>

❯ python3
Python <span class="m">3</span>.6.9 <span class="o">(</span>default, Apr <span class="m">18</span> <span class="m">2020</span>, <span class="m">01</span>:56:04<span class="o">)</span>
<span class="o">[</span>GCC <span class="m">8</span>.4.0<span class="o">]</span> on linux
Type <span class="s2">"help"</span>, <span class="s2">"copyright"</span>, <span class="s2">"credits"</span> or <span class="s2">"license"</span> <span class="k">for</span> more information.
&gt;&gt;&gt; import torch
&gt;&gt;&gt; import torch_tensorrt
&gt;&gt;&gt; <span class="nv">ts_model</span> <span class="o">=</span> torch.jit.load<span class="o">(</span>“trt_lenet_scripted.ts”<span class="o">)</span>
&gt;&gt;&gt; ts_model<span class="o">(</span>torch.randn<span class="o">((</span><span class="m">1</span>,1,32,32<span class="o">))</span>.to<span class="o">(</span>“cuda”<span class="o">)</span>.half<span class="o">())</span>
</pre>
           </div>
          </div>
          <p>
           You can learn more about
           <code class="docutils literal notranslate">
            <span class="pre">
             torchtrtc
            </span>
           </code>
           usage here:
           <a class="reference internal" href="torchtrtc.html#torchtrtc">
            <span class="std std-ref">
             torchtrtc
            </span>
           </a>
          </p>
         </section>
         <section id="working-with-torchscript-in-c">
          <span id="ts-in-cc">
          </span>
          <h3 id="working-with-torchscript-in-c">
           Working with TorchScript in C++
           <a class="headerlink" href="#working-with-torchscript-in-c" title="Permalink to this headline">
            ¶
           </a>
          </h3>
          <p>
           If we are developing an application to deploy with C++, we can save either our traced or scripted module using
           <code class="docutils literal notranslate">
            <span class="pre">
             torch.jit.save
            </span>
           </code>
           which will serialize the TorchScript code, weights and other information into a package. This is also where our dependency on Python ends.
          </p>
          <div class="highlight-python notranslate">
           <div class="highlight">
            <pre><span></span><span class="n">torch_script_module</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">"lenet.jit.pt"</span><span class="p">)</span>
</pre>
           </div>
          </div>
          <p>
           From here we can now load our TorchScript module in C++
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;torch/script.h&gt;</span><span class="c1"> // One-stop header.</span><span class="cp"></span>

<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;iostream&gt;</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;memory&gt;</span><span class="cp"></span>

<span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">argc</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="kt">char</span><span class="o">*</span><span class="w"> </span><span class="n">argv</span><span class="p">[])</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="n">torch</span><span class="o">::</span><span class="n">jit</span><span class="o">::</span><span class="n">Module</span><span class="w"> </span><span class="k">module</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">try</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="c1">// Deserialize the ScriptModule from a file using torch::jit::load().</span>
<span class="w">        </span><span class="k">module</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">jit</span><span class="o">::</span><span class="n">load</span><span class="p">(</span><span class="s">"&lt;PATH TO SAVED TS MOD&gt;"</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>
<span class="w">    </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">c10</span><span class="o">::</span><span class="n">Error</span><span class="o">&amp;</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="n">std</span><span class="o">::</span><span class="n">cerr</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">"error loading the model</span><span class="se">\n</span><span class="s">"</span><span class="p">;</span><span class="w"></span>
<span class="w">        </span><span class="k">return</span><span class="w"> </span><span class="mi">-1</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>

<span class="w">    </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">"ok</span><span class="se">\n</span><span class="s">"</span><span class="p">;</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           You can do full training and inference in C++ with PyTorch / LibTorch if you would like, you can even define your modules in C++ and
have access to the same powerful tensor library that backs PyTorch. (For more information:
           <a class="reference external" href="https://pytorch.org/cppdocs/">
            https://pytorch.org/cppdocs/
           </a>
           ).
For instance we can do inference with our LeNet module like this:
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="n">mod</span><span class="p">.</span><span class="n">eval</span><span class="p">();</span><span class="w"></span>
<span class="n">torch</span><span class="o">::</span><span class="n">Tensor</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">randn</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">});</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mod</span><span class="p">.</span><span class="n">forward</span><span class="p">(</span><span class="n">in</span><span class="p">);</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           and to run on the GPU:
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="n">mod</span><span class="p">.</span><span class="n">eval</span><span class="p">();</span><span class="w"></span>
<span class="n">mod</span><span class="p">.</span><span class="n">to</span><span class="p">(</span><span class="n">torch</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">);</span><span class="w"></span>
<span class="n">torch</span><span class="o">::</span><span class="n">Tensor</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">randn</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">},</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">);</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mod</span><span class="p">.</span><span class="n">forward</span><span class="p">(</span><span class="n">in</span><span class="p">);</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           As you can see it is pretty similar to the Python API. When you call the
           <code class="docutils literal notranslate">
            <span class="pre">
             forward
            </span>
           </code>
           method, you invoke the PyTorch JIT compiler, which will optimize and run your TorchScript code.
          </p>
         </section>
         <section id="compiling-with-torch-tensorrt-in-c">
          <span id="compile-cpp">
          </span>
          <h3 id="compiling-with-torch-tensorrt-in-c">
           Compiling with Torch-TensorRT in C++
           <a class="headerlink" href="#compiling-with-torch-tensorrt-in-c" title="Permalink to this headline">
            ¶
           </a>
          </h3>
          <p>
           We are also at the point were we can compile and optimize our module with Torch-TensorRT, but instead of in a JIT fashion we must do it ahead-of-time (AOT) i.e. before we start doing actual inference work
since it takes a bit of time to optimize the module, it would not make sense to do this every time you run the module or even the first time you run it.
          </p>
          <p>
           With our module loaded, we can feed it into the Torch-TensorRT compiler. When we do so we must provide some information on the expected input size and also configure any additional settings.
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch/script.h"</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch_tensorrt/torch_tensorrt.h"</span><span class="cp"></span>
<span class="p">...</span><span class="w"></span>

<span class="w">    </span><span class="n">mod</span><span class="p">.</span><span class="n">to</span><span class="p">(</span><span class="n">at</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">mod</span><span class="p">.</span><span class="n">eval</span><span class="p">();</span><span class="w"></span>

<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">randn</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">torch</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">});</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">trt_mod</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileGraph</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileSpec</span><span class="o">::</span><span class="n">InputRange</span><span class="o">&gt;</span><span class="p">{{</span><span class="n">in</span><span class="p">.</span><span class="n">sizes</span><span class="p">()}});</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">trt_mod</span><span class="p">.</span><span class="n">forward</span><span class="p">({</span><span class="n">in</span><span class="p">});</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           Thats it! Now the graph runs primarily not with the JIT compiler but using TensorRT (though we execute the graph using the JIT runtime).
          </p>
          <p>
           We can also set settings like operating precision to run in FP16.
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch/script.h"</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch_tensorrt/torch_tensorrt.h"</span><span class="cp"></span>
<span class="p">...</span><span class="w"></span>

<span class="w">    </span><span class="n">mod</span><span class="p">.</span><span class="n">to</span><span class="p">(</span><span class="n">at</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">mod</span><span class="p">.</span><span class="n">eval</span><span class="p">();</span><span class="w"></span>

<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">randn</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">torch</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">}).</span><span class="n">to</span><span class="p">(</span><span class="n">torch</span><span class="o">::</span><span class="n">kHALF</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">input_sizes</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileSpec</span><span class="o">::</span><span class="n">InputRange</span><span class="o">&gt;</span><span class="p">({</span><span class="n">in</span><span class="p">.</span><span class="n">sizes</span><span class="p">()});</span><span class="w"></span>
<span class="w">    </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileSpec</span><span class="w"> </span><span class="nf">info</span><span class="p">(</span><span class="n">input_sizes</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">info</span><span class="p">.</span><span class="n">enable_precisions</span><span class="p">.</span><span class="n">insert</span><span class="p">(</span><span class="n">torch</span><span class="o">::</span><span class="n">kHALF</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">trt_mod</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileGraph</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span><span class="w"> </span><span class="n">info</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">trt_mod</span><span class="p">.</span><span class="n">forward</span><span class="p">({</span><span class="n">in</span><span class="p">});</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           And now we are running the module in FP16 precision. You can then save the module to load later.
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="n">trt_mod</span><span class="p">.</span><span class="n">save</span><span class="p">(</span><span class="s">"&lt;PATH TO SAVED TRT/TS MOD&gt;"</span><span class="p">)</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           Torch-TensorRT compiled TorchScript modules are loaded in the same way as normal TorchScript module. Make sure your deployment application is linked against
           <code class="docutils literal notranslate">
            <span class="pre">
             libtorchtrt.so
            </span>
           </code>
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch/script.h"</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch_tensorrt/torch_tensorrt.h"</span><span class="cp"></span>

<span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">argc</span><span class="p">,</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="kt">char</span><span class="o">*</span><span class="w"> </span><span class="n">argv</span><span class="p">[])</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="n">torch</span><span class="o">::</span><span class="n">jit</span><span class="o">::</span><span class="n">Module</span><span class="w"> </span><span class="k">module</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">try</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="c1">// Deserialize the ScriptModule from a file using torch::jit::load().</span>
<span class="w">        </span><span class="k">module</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">jit</span><span class="o">::</span><span class="n">load</span><span class="p">(</span><span class="s">"&lt;PATH TO SAVED TRT/TS MOD&gt;"</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>
<span class="w">    </span><span class="k">catch</span><span class="w"> </span><span class="p">(</span><span class="k">const</span><span class="w"> </span><span class="n">c10</span><span class="o">::</span><span class="n">Error</span><span class="o">&amp;</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="n">std</span><span class="o">::</span><span class="n">cerr</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">"error loading the model</span><span class="se">\n</span><span class="s">"</span><span class="p">;</span><span class="w"></span>
<span class="w">        </span><span class="k">return</span><span class="w"> </span><span class="mi">-1</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>

<span class="w">    </span><span class="n">torch</span><span class="o">::</span><span class="n">Tensor</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">randn</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">},</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mod</span><span class="p">.</span><span class="n">forward</span><span class="p">(</span><span class="n">in</span><span class="p">);</span><span class="w"></span>

<span class="w">    </span><span class="n">std</span><span class="o">::</span><span class="n">cout</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="s">"ok</span><span class="se">\n</span><span class="s">"</span><span class="p">;</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>
</pre>
           </div>
          </div>
          <p>
           If you want to save the engine produced by Torch-TensorRT to use in a TensorRT application you can use the
           <code class="docutils literal notranslate">
            <span class="pre">
             ConvertGraphToTRTEngine
            </span>
           </code>
           API.
          </p>
          <div class="highlight-c++ notranslate">
           <div class="highlight">
            <pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch/script.h"</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch_tensorrt/torch_tensorrt.h"</span><span class="cp"></span>
<span class="p">...</span><span class="w"></span>

<span class="w">    </span><span class="n">mod</span><span class="p">.</span><span class="n">to</span><span class="p">(</span><span class="n">at</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">mod</span><span class="p">.</span><span class="n">eval</span><span class="p">();</span><span class="w"></span>

<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">randn</span><span class="p">({</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">,</span><span class="w"> </span><span class="mi">32</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="n">torch</span><span class="o">::</span><span class="n">kCUDA</span><span class="p">}).</span><span class="n">to</span><span class="p">(</span><span class="n">torch</span><span class="o">::</span><span class="n">kHALF</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">input_sizes</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileSpec</span><span class="o">::</span><span class="n">InputRange</span><span class="o">&gt;</span><span class="p">({</span><span class="n">in</span><span class="p">.</span><span class="n">sizes</span><span class="p">()});</span><span class="w"></span>
<span class="w">    </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">CompileSpec</span><span class="w"> </span><span class="nf">info</span><span class="p">(</span><span class="n">input_sizes</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">info</span><span class="p">.</span><span class="n">enabled_precisions</span><span class="p">.</span><span class="n">insert</span><span class="p">(</span><span class="n">torch</span><span class="o">::</span><span class="n">kHALF</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">trt_mod</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">ConvertGraphToTRTEngine</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span><span class="w"> </span><span class="s">"forward"</span><span class="p">,</span><span class="w"> </span><span class="n">info</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">std</span><span class="o">::</span><span class="n">ofstream</span><span class="w"> </span><span class="nf">out</span><span class="p">(</span><span class="s">"/tmp/engine_converted_from_jit.trt"</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">out</span><span class="w"> </span><span class="o">&lt;&lt;</span><span class="w"> </span><span class="n">engine</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="n">out</span><span class="p">.</span><span class="n">close</span><span class="p">();</span><span class="w"></span>
</pre>
           </div>
          </div>
         </section>
         <section id="under-the-hood">
          <span id="id1">
          </span>
          <h3 id="under-the-hood">
           Under The Hood
           <a class="headerlink" href="#under-the-hood" title="Permalink to this headline">
            ¶
           </a>
          </h3>
          <p>
           When a module is provided to Torch-TensorRT, the compiler starts by mapping a graph like you saw above to a graph like this:
          </p>
          <div class="highlight-none notranslate">
           <div class="highlight">
            <pre><span></span>graph(%input.2 : Tensor):
    %2 : Float(84, 10) = prim::Constant[value=&lt;Tensor&gt;]()
    %3 : Float(120, 84) = prim::Constant[value=&lt;Tensor&gt;]()
    %4 : Float(576, 120) = prim::Constant[value=&lt;Tensor&gt;]()
    %5 : int = prim::Constant[value=-1]() # x.py:25:0
    %6 : int[] = prim::Constant[value=annotate(List[int], [])]()
    %7 : int[] = prim::Constant[value=[2, 2]]()
    %8 : int[] = prim::Constant[value=[0, 0]]()
    %9 : int[] = prim::Constant[value=[1, 1]]()
    %10 : bool = prim::Constant[value=1]() # ~/.local/lib/python3.6/site-packages/torch/nn/modules/conv.py:346:0
    %11 : int = prim::Constant[value=1]() # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:539:0
    %12 : bool = prim::Constant[value=0]() # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:539:0
    %self.classifer.fc3.bias : Float(10) = prim::Constant[value= 0.0464  0.0383  0.0678  0.0932  0.1045 -0.0805 -0.0435 -0.0818  0.0208 -0.0358 [ CUDAFloatType{10} ]]()
    %self.classifer.fc2.bias : Float(84) = prim::Constant[value=&lt;Tensor&gt;]()
    %self.classifer.fc1.bias : Float(120) = prim::Constant[value=&lt;Tensor&gt;]()
    %self.feat.conv2.weight : Float(16, 6, 3, 3) = prim::Constant[value=&lt;Tensor&gt;]()
    %self.feat.conv2.bias : Float(16) = prim::Constant[value=&lt;Tensor&gt;]()
    %self.feat.conv1.weight : Float(6, 1, 3, 3) = prim::Constant[value=&lt;Tensor&gt;]()
    %self.feat.conv1.bias : Float(6) = prim::Constant[value= 0.0530 -0.1691  0.2802  0.1502  0.1056 -0.1549 [ CUDAFloatType{6} ]]()
    %input0.4 : Tensor = aten::_convolution(%input.2, %self.feat.conv1.weight, %self.feat.conv1.bias, %9, %8, %9, %12, %8, %11, %12, %12, %10) # ~/.local/lib/python3.6/site-packages/torch/nn/modules/conv.py:346:0
    %input0.5 : Tensor = aten::relu(%input0.4) # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:1063:0
    %input1.2 : Tensor = aten::max_pool2d(%input0.5, %7, %6, %8, %9, %12) # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:539:0
    %input0.6 : Tensor = aten::_convolution(%input1.2, %self.feat.conv2.weight, %self.feat.conv2.bias, %9, %8, %9, %12, %8, %11, %12, %12, %10) # ~/.local/lib/python3.6/site-packages/torch/nn/modules/conv.py:346:0
    %input2.1 : Tensor = aten::relu(%input0.6) # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:1063:0
    %x.1 : Tensor = aten::max_pool2d(%input2.1, %7, %6, %8, %9, %12) # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:539:0
    %input.1 : Tensor = aten::flatten(%x.1, %11, %5) # x.py:25:0
    %27 : Tensor = aten::matmul(%input.1, %4)
    %28 : Tensor = trt::const(%self.classifer.fc1.bias)
    %29 : Tensor = aten::add_(%28, %27, %11)
    %input0.2 : Tensor = aten::relu(%29) # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:1063:0
    %31 : Tensor = aten::matmul(%input0.2, %3)
    %32 : Tensor = trt::const(%self.classifer.fc2.bias)
    %33 : Tensor = aten::add_(%32, %31, %11)
    %input1.1 : Tensor = aten::relu(%33) # ~/.local/lib/python3.6/site-packages/torch/nn/functional.py:1063:0
    %35 : Tensor = aten::matmul(%input1.1, %2)
    %36 : Tensor = trt::const(%self.classifer.fc3.bias)
    %37 : Tensor = aten::add_(%36, %35, %11)
    return (%37)
(CompileGraph)
</pre>
           </div>
          </div>
          <p>
           The graph has now been transformed from a collection of modules, each managing their own parameters into a single graph with the parameters inlined
into the graph and all of the operations laid out. Torch-TensorRT has also executed a number of optimizations and mappings to make the graph easier to translate to TensorRT.
From here the compiler can assemble the TensorRT engine by following the dataflow through the graph.
          </p>
          <p>
           When the graph construction phase is complete, Torch-TensorRT produces a serialized TensorRT engine. From here depending on the API, this engine is returned
to the user or moves into the graph construction phase. Here Torch-TensorRT creates a JIT Module to execute the TensorRT engine which will be instantiated and managed
by the Torch-TensorRT runtime.
          </p>
          <p>
           Here is the graph that you get back after compilation is complete:
          </p>
          <div class="highlight-none notranslate">
           <div class="highlight">
            <pre><span></span>graph(%self_1 : __torch__.lenet, %input_0 : Tensor):
    %1 : ...trt.Engine = prim::GetAttr[name="lenet"](%self_1)
    %3 : Tensor[] = prim::ListConstruct(%input_0)
    %4 : Tensor[] = trt::execute_engine(%3, %1)
    %5 : Tensor = prim::ListUnpack(%4)
    return (%5)
</pre>
           </div>
          </div>
          <p>
           You can see the call where the engine is executed, after extracting the attribute containing the engine and constructing a list of inputs, then returns the tensors back to the user.
          </p>
         </section>
         <section id="working-with-unsupported-operators">
          <span id="unsupported-ops">
          </span>
          <h3 id="working-with-unsupported-operators">
           Working with Unsupported Operators
           <a class="headerlink" href="#working-with-unsupported-operators" title="Permalink to this headline">
            ¶
           </a>
          </h3>
          <p>
           Torch-TensorRT is a new library and the PyTorch operator library is quite large, so there will be ops that aren’t supported natively by the compiler. You can either use the composition techinques
shown above to make modules are fully Torch-TensorRT supported and ones that are not and stitch the modules together in the deployment application or you can register converters for missing ops.
          </p>
          <blockquote>
           <div>
            <p>
             You can check support without going through the full compilation pipleine using the
             <code class="docutils literal notranslate">
              <span class="pre">
               torch_tensorrt::CheckMethodOperatorSupport(const
              </span>
              <span class="pre">
               torch::jit::Module&amp;
              </span>
              <span class="pre">
               module,
              </span>
              <span class="pre">
               std::string
              </span>
              <span class="pre">
               method_name)
              </span>
             </code>
             api
to see what operators are not supported.
             <code class="docutils literal notranslate">
              <span class="pre">
               torchtrtc
              </span>
             </code>
             automatically checks modules with this method before starting compilation and will print out a list of operators that are not supported.
            </p>
           </div>
          </blockquote>
          <section id="registering-custom-converters">
           <span id="custom-converters">
           </span>
           <h4 id="registering-custom-converters">
            Registering Custom Converters
            <a class="headerlink" href="#registering-custom-converters" title="Permalink to this headline">
             ¶
            </a>
           </h4>
           <p>
            Operations are mapped to TensorRT through the use of modular converters, a function that takes a node from a the JIT graph and produces an equivalent layer or subgraph in TensorRT.
Torch-TensorRT ships with a library of these converters stored in a registry, that will be executed depending on the node being parsed. For instance a
            <code class="docutils literal notranslate">
             <span class="pre">
              aten::relu(%input0.4)
             </span>
            </code>
            instruction will trigger
the relu converter to be run on it, producing an activation layer in the TensorRT graph. But since this library is not exhaustive you may need to write your own to get Torch-TensorRT
to support your module.
           </p>
           <p>
            Shipped with the Torch-TensorRT distribution are the internal core API headers. You can therefore access the converter registry and add a converter for the op you need.
           </p>
           <p>
            For example, if we try to compile a graph with a build of Torch-TensorRT that doesn’t support the flatten operation (
            <code class="docutils literal notranslate">
             <span class="pre">
              aten::flatten
             </span>
            </code>
            ) you may see this error:
           </p>
           <div class="highlight-none notranslate">
            <div class="highlight">
             <pre><span></span>terminate called after throwing an instance of 'torch_tensorrt::Error'
what():  [enforce fail at core/conversion/conversion.cpp:109] Expected converter to be true but got false
Unable to convert node: %input.1 : Tensor = aten::flatten(%x.1, %11, %5) # x.py:25:0 (conversion.AddLayer)
Schema: aten::flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -&gt; (Tensor)
Converter for aten::flatten requested, but no such converter was found.
If you need a converter for this operator, you can try implementing one yourself
or request a converter: https://www.github.com/NVIDIA/Torch-TensorRT/issues
</pre>
            </div>
           </div>
           <p>
            We can register a converter for this operator in our application. All of the tools required to build a converter can be imported by including
            <code class="docutils literal notranslate">
             <span class="pre">
              torch_tensorrt/core/conversion/converters/converters.h
             </span>
            </code>
            .
We start by creating an instance of the self-registering class
            <code class="docutils literal notranslate">
             <span class="pre">
              torch_tensorrt::core::conversion::converters::RegisterNodeConversionPatterns()
             </span>
            </code>
            which will register converters
in the global converter registry, associating a function schema like
            <code class="docutils literal notranslate">
             <span class="pre">
              aten::flatten.using_ints(Tensor
             </span>
             <span class="pre">
              self,
             </span>
             <span class="pre">
              int
             </span>
             <span class="pre">
              start_dim=0,
             </span>
             <span class="pre">
              int
             </span>
             <span class="pre">
              end_dim=-1)
             </span>
             <span class="pre">
              -&gt;
             </span>
             <span class="pre">
              (Tensor)
             </span>
            </code>
            with a lambda that
will take the state of the conversion, the node/operation in question to convert and all of the inputs to the node and produces as a side effect a new layer in the TensorRT network.
Arguments are passed as a vector of inspectable unions of TensorRT
            <code class="docutils literal notranslate">
             <span class="pre">
              ITensors
             </span>
            </code>
            and Torch
            <code class="docutils literal notranslate">
             <span class="pre">
              IValues
             </span>
            </code>
            in the order arguments are listed in the schema.
           </p>
           <p>
            Below is a implementation of a
            <code class="docutils literal notranslate">
             <span class="pre">
              aten::flatten
             </span>
            </code>
            converter that we can use in our application. You have full access to the Torch and TensorRT libraries in the converter implementation. So
for example we can quickly get the output size by just running the operation in PyTorch instead of implementing the full calculation outself like we do below for this flatten converter.
           </p>
           <div class="highlight-c++ notranslate">
            <div class="highlight">
             <pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch/script.h"</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch_tensorrt/torch_tensorrt.h"</span><span class="cp"></span>
<span class="cp">#include</span><span class="w"> </span><span class="cpf">"torch_tensorrt/core/conversion/converters/converters.h"</span><span class="cp"></span>

<span class="k">static</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">flatten_converter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">core</span><span class="o">::</span><span class="n">conversion</span><span class="o">::</span><span class="n">converters</span><span class="o">::</span><span class="n">RegisterNodeConversionPatterns</span><span class="p">()</span><span class="w"></span>
<span class="w">    </span><span class="p">.</span><span class="n">pattern</span><span class="p">({</span><span class="w"></span>
<span class="w">        </span><span class="s">"aten::flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -&gt; (Tensor)"</span><span class="p">,</span><span class="w"></span>
<span class="w">        </span><span class="p">[](</span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">core</span><span class="o">::</span><span class="n">conversion</span><span class="o">::</span><span class="n">ConversionCtx</span><span class="o">*</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"></span>
<span class="w">           </span><span class="k">const</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">jit</span><span class="o">::</span><span class="n">Node</span><span class="o">*</span><span class="w"> </span><span class="n">n</span><span class="p">,</span><span class="w"></span>
<span class="w">           </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">core</span><span class="o">::</span><span class="n">conversion</span><span class="o">::</span><span class="n">converters</span><span class="o">::</span><span class="n">args</span><span class="o">&amp;</span><span class="w"> </span><span class="n">args</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">].</span><span class="n">ITensor</span><span class="p">();</span><span class="w"></span>
<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">start_dim</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">].</span><span class="n">unwrapToInt</span><span class="p">();</span><span class="w"></span>
<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">end_dim</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">args</span><span class="p">[</span><span class="mi">2</span><span class="p">].</span><span class="n">unwrapToInt</span><span class="p">();</span><span class="w"></span>
<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">in_shape</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">core</span><span class="o">::</span><span class="n">util</span><span class="o">::</span><span class="n">toVec</span><span class="p">(</span><span class="n">in</span><span class="o">-&gt;</span><span class="n">getDimensions</span><span class="p">());</span><span class="w"></span>
<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">out_shape</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">torch</span><span class="o">::</span><span class="n">flatten</span><span class="p">(</span><span class="n">torch</span><span class="o">::</span><span class="n">rand</span><span class="p">(</span><span class="n">in_shape</span><span class="p">),</span><span class="w"> </span><span class="n">start_dim</span><span class="p">,</span><span class="w"> </span><span class="n">end_dim</span><span class="p">).</span><span class="n">sizes</span><span class="p">();</span><span class="w"></span>

<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">shuffle</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="o">-&gt;</span><span class="n">net</span><span class="o">-&gt;</span><span class="n">addShuffle</span><span class="p">(</span><span class="o">*</span><span class="n">in</span><span class="p">);</span><span class="w"></span>
<span class="w">            </span><span class="n">shuffle</span><span class="o">-&gt;</span><span class="n">setReshapeDimensions</span><span class="p">(</span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">core</span><span class="o">::</span><span class="n">util</span><span class="o">::</span><span class="n">toDims</span><span class="p">(</span><span class="n">out_shape</span><span class="p">));</span><span class="w"></span>
<span class="w">            </span><span class="n">shuffle</span><span class="o">-&gt;</span><span class="n">setName</span><span class="p">(</span><span class="n">torch_tensorrt</span><span class="o">::</span><span class="n">core</span><span class="o">::</span><span class="n">util</span><span class="o">::</span><span class="n">node_info</span><span class="p">(</span><span class="n">n</span><span class="p">).</span><span class="n">c_str</span><span class="p">());</span><span class="w"></span>

<span class="w">            </span><span class="k">auto</span><span class="w"> </span><span class="n">out_tensor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ctx</span><span class="o">-&gt;</span><span class="n">AssociateValueAndTensor</span><span class="p">(</span><span class="n">n</span><span class="o">-&gt;</span><span class="n">outputs</span><span class="p">()[</span><span class="mi">0</span><span class="p">],</span><span class="w"> </span><span class="n">shuffle</span><span class="o">-&gt;</span><span class="n">getOutput</span><span class="p">(</span><span class="mi">0</span><span class="p">));</span><span class="w"></span>
<span class="w">            </span><span class="k">return</span><span class="w"> </span><span class="nb">true</span><span class="p">;</span><span class="w"></span>
<span class="w">        </span><span class="p">}</span><span class="w"></span>
<span class="w">    </span><span class="p">});</span><span class="w"></span>

<span class="kt">int</span><span class="w"> </span><span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="p">...</span><span class="w"></span>
</pre>
            </div>
           </div>
           <p>
            To use this converter in Python, it is recommended to use PyTorch’s
            <a class="reference external" href="https://pytorch.org/tutorials/advanced/cpp_extension.html#custom-c-and-cuda-extensions">
             C++ / CUDA Extention
            </a>
            template to wrap your library of converters into a
            <code class="docutils literal notranslate">
             <span class="pre">
              .so
             </span>
            </code>
            that you can load with
            <code class="docutils literal notranslate">
             <span class="pre">
              ctypes.CDLL()
             </span>
            </code>
            in your Python application.
           </p>
           <p>
            You can find more information on all the details of writing converters in the contributors documentation (
            <a class="reference internal" href="../contributors/writing_converters.html#writing-converters">
             <span class="std std-ref">
              Writing Converters
             </span>
            </a>
            ).
If you find yourself with a large library of converter implementations, do consider upstreaming them, PRs are welcome and it would be great for the community to benefit as well.
           </p>
          </section>
         </section>
        </section>
       </section>
      </article>
     </div>
    </div>
   </main>
  </div>
  <footer class="md-footer">
   <div class="md-footer-nav">
    <nav class="md-footer-nav__inner md-grid">
     <a class="md-flex md-footer-nav__link md-footer-nav__link--prev" href="installation.html" rel="prev" title="Installation">
      <div class="md-flex__cell md-flex__cell--shrink">
       <i class="md-icon md-icon--arrow-back md-footer-nav__button">
       </i>
      </div>
      <div class="md-flex__cell md-flex__cell--stretch md-footer-nav__title">
       <span class="md-flex__ellipsis">
        <span class="md-footer-nav__direction">
         Previous
        </span>
        Installation
       </span>
      </div>
     </a>
     <a class="md-flex md-footer-nav__link md-footer-nav__link--next" href="getting_started_with_python_api.html" rel="next" title="Using Torch-TensorRT in Python">
      <div class="md-flex__cell md-flex__cell--stretch md-footer-nav__title">
       <span class="md-flex__ellipsis">
        <span class="md-footer-nav__direction">
         Next
        </span>
        Using Torch-TensorRT in Python
       </span>
      </div>
      <div class="md-flex__cell md-flex__cell--shrink">
       <i class="md-icon md-icon--arrow-forward md-footer-nav__button">
       </i>
      </div>
     </a>
    </nav>
   </div>
   <div class="md-footer-meta md-typeset">
    <div class="md-footer-meta__inner md-grid">
     <div class="md-footer-copyright">
      <div class="md-footer-copyright__highlight">
       © Copyright 2021, NVIDIA Corporation.
      </div>
      Created using
      <a href="http://www.sphinx-doc.org/">
       Sphinx
      </a>
      4.3.0.
             and
      <a href="https://github.com/bashtage/sphinx-material/">
       Material for
              Sphinx
      </a>
     </div>
    </div>
   </div>
  </footer>
  <script src="../_static/javascripts/application.js">
  </script>
  <script>
   app.initialize({version: "1.0.4", url: {base: ".."}})
  </script>
 </body>
</html>